- Sun 09 February 2020
- Data Science
- Michael Lehotay
- #data-analysis, #visualization, #SQL
Don't forget to check final version against project 1 rubric and report template
If you combine your problem statement, executive summary, data dictionary, and conclusions/recommendations, you have an amazing README.md file that quickly aligns your audience to the contents of your project. Don't forget to cite your data sources!
Contents
intro
data description
connect to db, list tables
SQL query 1: battlegrounds & players
rating vs rank
ilvl vs rank
ilvl vs rating
achievement vs rank
win ratio vs rank
num matches vs win ratio
wins vs num matches
rating histogram
rank histogram
ilvl histogram
achievement histogram
pair plot
SQL query 2: all 3 leaderboards
ratings vs rank
SQL query 3 all 3 leaderboards & players
ratings vs rank grouped by leaderboard
ilvl vs rank grouped by leaderboard
ilvl vs rank grouped by player class
win ratio vs rank grouped by player class
win ratio vs rank grouped by faction
player class bar chart
player class bar chart stacked factions
win ratio bar chart by faction
mean rating bar chart by class
mean rank bar chart by class for all leaderboards
mean rank by player details
ratings histogram for all leaderboards
I recently scraped the PvP leaderboards from the World of Warcraft website and wrote them out to a SQLite database. Let's plot some charts of the data and see if anything interesting turns up.
There are three leaderboards: the 2v2 Arena, the 3v3 Arena, and the 10x10 Battleground. Each leaderboard lists the top 1000 players by rating. (I think these are Elo ratings.) Anyway, I saved the leaderboards to a SQL database as three separate tables. I also scraped a minimal amount of data from the profile pages of each of the characters on the leaderboards and saved the profile data to a fourth table.
import sqlite3
import altair as alt
import pandas as pd
query = '''
SELECT *
FROM sqlite_master
WHERE type='table'
'''
con = sqlite3.connect('data/wow.db')
pd.read_sql(query, con)
| type | name | tbl_name | rootpage | sql | |
|---|---|---|---|---|---|
| 0 | table | arena_2v2 | arena_2v2 | 2 | CREATE TABLE "arena_2v2" (\n"rank" INTEGER,\n ... |
| 1 | table | arena_3v3 | arena_3v3 | 32 | CREATE TABLE "arena_3v3" (\n"rank" INTEGER,\n ... |
| 2 | table | battlegrounds | battlegrounds | 62 | CREATE TABLE "battlegrounds" (\n"rank" INTEGER... |
| 3 | table | players | players | 93 | CREATE TABLE "players" (\n"name" TEXT,\n "tit... |
query = '''
SELECT name, rank, rating, wins, losses, achievement, ilvl
FROM battlegrounds
LEFT JOIN players
ON battlegrounds.url = players.URL
'''
con = sqlite3.connect('data/wow.db')
df = pd.read_sql(query, con)
con.close()
df
| name | rank | rating | wins | losses | achievement | ilvl | |
|---|---|---|---|---|---|---|---|
| 0 | Crdefender | 1 | 2163 | 81 | 4 | 18265.0 | 456.0 |
| 1 | Lifeswaplol | 1 | 2163 | 80 | 3 | 8945.0 | 462.0 |
| 2 | Wolf | 3 | 2153 | 77 | 4 | 18155.0 | 463.0 |
| 3 | Intricate | 4 | 2135 | 72 | 4 | 10530.0 | 455.0 |
| 4 | Jøkes | 5 | 2125 | 69 | 2 | 15550.0 | 459.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 995 | Evileretta | 995 | 1443 | 9 | 14 | 12470.0 | 452.0 |
| 996 | Idoless | 995 | 1443 | 10 | 12 | 13985.0 | 447.0 |
| 997 | Vyaz | 995 | 1443 | 8 | 3 | 18820.0 | 457.0 |
| 998 | Xwarlord | 995 | 1443 | 12 | 14 | 10505.0 | 447.0 |
| 999 | Thedarklord | 1000 | 1442 | 15 | 12 | 16195.0 | 437.0 |
1000 rows × 7 columns
df['num_matches'] = df['wins'] + df['losses']
df['win_ratio'] = df['wins'] / df['num_matches']
alt.Chart(df).mark_point().encode(x='rank', y='rating')
alt.Chart(df).mark_point().encode(x='rank', y='ilvl')
alt.Chart(df).mark_point().encode(alt.X('rating:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(zero=False)))
alt.Chart(df).mark_point().encode(x='rank', y='achievement')
alt.Chart(df).mark_point().encode(x='rank', y='win_ratio')
alt.Chart(df).mark_point().encode(alt.X('win_ratio:Q', scale=alt.Scale(zero=False)), y='num_matches')
alt.Chart(df).mark_point().encode(alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), x='num_matches')
alt.Chart(df).mark_point().encode(x='num_matches', y='wins')
alt.Chart(df).mark_bar().encode(alt.X("rating:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("rank:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("ilvl:Q", bin=True), y='count()')
alt.Chart(df).mark_bar().encode(alt.X("achievement:Q", bin=True), y='count()')
alt.Chart(df).mark_circle().encode(
alt.X(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
alt.Y(alt.repeat("row"), type='quantitative', scale=alt.Scale(zero=False))
).properties(
width=100,
height=100
).repeat(
row=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement'],
column=['rank', 'rating', 'win_ratio', 'ilvl', 'achievement']
)
query = '''
SELECT *, '2v2' as board
FROM arena_2v2
UNION ALL
SELECT *, '3v3' as board
FROM arena_3v3
UNION ALL
SELECT *, 'battlegrounds' as board
FROM battlegrounds
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
| rank | rating | player | class | faction | realm | wins | losses | url | board | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2611 | Dinoe | Druid | ALLIANCE | Sargeras | 209 | 25 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 1 | 2 | 2591 | Thugonomiczz | Warlock | ALLIANCE | Stormrage | 95 | 9 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 2 | 3 | 2539 | Drãke | Monk | ALLIANCE | Stormrage | 118 | 36 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 3 | 4 | 2514 | Niarb | Paladin | ALLIANCE | Laughing Skull | 98 | 42 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| 4 | 5 | 2499 | Kubyzy | Druid | ALLIANCE | Kel'Thuzad | 115 | 21 | https://worldofwarcraft.com/en-us/character/us... | 2v2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2994 | 995 | 1443 | Evileretta | Warlock | ALLIANCE | Vashj | 9 | 14 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2995 | 995 | 1443 | Idoless | Priest | HORDE | Magtheridon | 10 | 12 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2996 | 995 | 1443 | Vyaz | Rogue | ALLIANCE | Stormrage | 8 | 3 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2997 | 995 | 1443 | Xwarlord | Priest | HORDE | Firetree | 12 | 14 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
| 2998 | 1000 | 1442 | Thedarklord | Mage | ALLIANCE | Sargeras | 15 | 12 | https://worldofwarcraft.com/en-us/character/us... | battlegrounds |
2999 rows × 10 columns
alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
query = '''
SELECT board, rank, rating, name, title, realm, class, details,
faction, wins, losses, achievement, ilvl, players.url
FROM (
SELECT *, '2v2 arenas' as board
FROM arena_2v2
UNION
SELECT *, '3v3 arenas' as board
FROM arena_3v3
UNION
SELECT *, 'battlegrounds' as board
FROM battlegrounds
) leaderboards
JOIN players
WHERE players.url = leaderboards.url
'''
con = sqlite3.connect('data/wow.db')
df_boards = pd.read_sql(query, con)
con.close()
df_boards
| board | rank | rating | name | title | realm | class | details | faction | wins | losses | achievement | ilvl | url | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2v2 arenas | 1 | 2611 | Dinoe | Notorious Gladiator | Sargeras | Druid | 120 Night Elf Restoration Druid | ALLIANCE | 209 | 25 | 22180 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 1 | 3v3 arenas | 113 | 2155 | Dinoe | Notorious Gladiator | Sargeras | Druid | 120 Night Elf Restoration Druid | ALLIANCE | 141 | 50 | 22180 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 2 | 2v2 arenas | 2 | 2591 | Thugonomiczz | Wrathful Gladiator | Stormrage | Warlock | 120 Dwarf Destruction Warlock | ALLIANCE | 95 | 9 | 15320 | 465 | https://worldofwarcraft.com/en-us/character/us... |
| 3 | 2v2 arenas | 3 | 2539 | Drãke | Dread Gladiator | Stormrage | Monk | 120 Human Windwalker Monk | ALLIANCE | 118 | 36 | 10075 | 469 | https://worldofwarcraft.com/en-us/character/us... |
| 4 | 3v3 arenas | 12 | 2395 | Drãke | Dread Gladiator | Stormrage | Monk | 120 Human Windwalker Monk | ALLIANCE | 202 | 83 | 10075 | 469 | https://worldofwarcraft.com/en-us/character/us... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2965 | battlegrounds | 995 | 1443 | Xwarlord | High Warlord | Firetree | Priest | 120 Blood Elf Holy Priest | HORDE | 12 | 14 | 10505 | 447 | https://worldofwarcraft.com/en-us/character/us... |
| 2966 | battlegrounds | 995 | 1443 | Aviana | the Insane | Doomhammer | Priest | 120 Night Elf Holy Priest | ALLIANCE | 12 | 14 | 28380 | 457 | https://worldofwarcraft.com/en-us/character/us... |
| 2967 | battlegrounds | 995 | 1443 | Idoless | Warlord | Magtheridon | Priest | 120 Blood Elf Discipline Priest | HORDE | 10 | 12 | 13985 | 447 | https://worldofwarcraft.com/en-us/character/us... |
| 2968 | battlegrounds | 995 | 1443 | Evileretta | Justicar | Vashj | Warlock | 120 Human Destruction Warlock | ALLIANCE | 9 | 14 | 12470 | 452 | https://worldofwarcraft.com/en-us/character/us... |
| 2969 | battlegrounds | 1000 | 1442 | Thedarklord | Vanquisher | Sargeras | Mage | 120 Human Frost Mage | ALLIANCE | 15 | 12 | 16195 | 437 | https://worldofwarcraft.com/en-us/character/us... |
2970 rows × 14 columns
alt.Chart(df_boards).mark_point().encode(x='rank', y='rating', color='board')
alt.Chart(df_boards).mark_point().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('rating:Q', scale=alt.Scale(zero=False)),
color='board')
alt.Chart(df_boards).mark_point(opacity=0.75).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(zero=False)),
color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))), color='board')
alt.Chart(df_boards).mark_point(opacity=0.75, clip=True).encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('ilvl:Q', scale=alt.Scale(domain=(420, 480))),
color='class')
df_boards['num_matches'] = df_boards['wins'] + df_boards['losses']
df_boards['win_ratio'] = df_boards['wins'] / df_boards['num_matches']
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)), color='class')
alt.Chart(df_boards).mark_circle().encode(alt.X('rank:Q', scale=alt.Scale(zero=False)),
alt.Y('win_ratio:Q', scale=alt.Scale(zero=False)),
color='faction')
alt.Chart(df_boards).mark_bar().encode(x='class', y='count()')
alt.Chart(df_boards).mark_bar().encode(
x='class',
y='count()',
color='faction'
)
alt.Chart(df_boards).mark_bar().encode(
x='faction',
y='win_ratio',
)
bar = alt.Chart(df_boards).mark_bar().encode(
x='class:O',
y='mean(rating):Q'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
y='mean(rating):Q'
)
(bar + rule).properties(width=600)
bar = alt.Chart(df_boards).mark_bar().encode(
alt.Y('mean(rating)', scale=alt.Scale(zero=False)),
x='class'
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
y='mean(rating)'
)
(bar + rule).properties(width=500)
df_boards['rank2'] = 1000 - df_boards['rank']
alt.Chart(df_boards).mark_bar().encode(
alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
x='class',
column='board'
)
alt.Chart(df_boards).mark_bar().encode(
alt.X('class'),
alt.Y('mean(rank2)', scale=alt.Scale(zero=False)),
alt.Color('board'),
alt.Column('board')
)
bar = alt.Chart(df_boards).mark_bar().encode(
alt.X('mean(rank2)', scale=alt.Scale(zero=False)),
alt.Y('details', sort='-x'),
)
rule = alt.Chart(df_boards).mark_rule(color='red').encode(
x='mean(rank2)'
)
(bar + rule)
alt.Chart(df_boards).transform_fold(
['2v2 arenas', '3v3 arenas', 'battlegrounds'],
as_=['Leaderboard', '# Characters']
).mark_area(
opacity=0.5,
interpolate='step'
).encode(
alt.X('rating:Q', bin=alt.Bin(maxbins=100)),
alt.Y('count()', stack=None),
alt.Color('board:N')
)